#import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
# load the data
migration_df = pd.read_csv('Global Missing Migrants Dataset.csv')
migration_df.head()
| Incident Type | Incident year | Reported Month | Region of Origin | Region of Incident | Country of Origin | Number of Dead | Minimum Estimated Number of Missing | Total Number of Dead and Missing | Number of Survivors | Number of Females | Number of Males | Number of Children | Cause of Death | Migration route | Location of death | Information Source | Coordinates | UNSD Geographical Grouping | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Incident | 2014 | January | Central America | North America | Guatemala | 1.0 | 0 | 1 | 0 | 0 | 1 | 0 | Mixed or unknown | US-Mexico border crossing | Pima Country Office of the Medical Examiner ju... | Pima County Office of the Medical Examiner (PC... | 31.650259, -110.366453 | Northern America |
| 1 | Incident | 2014 | January | Latin America / Caribbean (P) | North America | Unknown | 1.0 | 0 | 1 | 0 | 0 | 0 | 0 | Mixed or unknown | US-Mexico border crossing | Pima Country Office of the Medical Examiner ju... | Pima County Office of the Medical Examiner (PC... | 31.59713, -111.73756 | Northern America |
| 2 | Incident | 2014 | January | Latin America / Caribbean (P) | North America | Unknown | 1.0 | 0 | 1 | 0 | 0 | 0 | 0 | Mixed or unknown | US-Mexico border crossing | Pima Country Office of the Medical Examiner ju... | Pima County Office of the Medical Examiner (PC... | 31.94026, -113.01125 | Northern America |
| 3 | Incident | 2014 | January | Central America | North America | Mexico | 1.0 | 0 | 1 | 0 | 0 | 1 | 0 | Violence | US-Mexico border crossing | near Douglas, Arizona, USA | Ministry of Foreign Affairs Mexico, Pima Count... | 31.506777, -109.315632 | Northern America |
| 4 | Incident | 2014 | January | Northern Africa | Europe | Sudan | 1.0 | 0 | 1 | 2 | 0 | 1 | 0 | Harsh environmental conditions / lack of adequ... | NaN | Border between Russia and Estonia | EUBusiness (Agence France-Presse) | 59.1551, 28 | Northern Europe |
migration_df.shape, migration_df.dtypes
((13020, 19), Incident Type object Incident year int64 Reported Month object Region of Origin object Region of Incident object Country of Origin object Number of Dead float64 Minimum Estimated Number of Missing int64 Total Number of Dead and Missing int64 Number of Survivors int64 Number of Females int64 Number of Males int64 Number of Children int64 Cause of Death object Migration route object Location of death object Information Source object Coordinates object UNSD Geographical Grouping object dtype: object)
migration_df.isna().sum()
Incident Type 0 Incident year 0 Reported Month 0 Region of Origin 22 Region of Incident 0 Country of Origin 8 Number of Dead 550 Minimum Estimated Number of Missing 0 Total Number of Dead and Missing 0 Number of Survivors 0 Number of Females 0 Number of Males 0 Number of Children 0 Cause of Death 0 Migration route 3021 Location of death 0 Information Source 8 Coordinates 36 UNSD Geographical Grouping 1 dtype: int64
to_drop = ['Incident Type', 'Region of Origin', 'Migration route', 'UNSD Geographical Grouping']
migration_df = migration_df.drop(to_drop, axis='columns')
migration_df.isna().sum()
Incident year 0 Reported Month 0 Region of Incident 0 Country of Origin 8 Number of Dead 550 Minimum Estimated Number of Missing 0 Total Number of Dead and Missing 0 Number of Survivors 0 Number of Females 0 Number of Males 0 Number of Children 0 Cause of Death 0 Location of death 0 Information Source 8 Coordinates 36 dtype: int64
migration_df['Number of Dead'] = migration_df['Number of Dead'].fillna(0)
migration_df.isna().sum()
Incident year 0 Reported Month 0 Region of Incident 0 Country of Origin 8 Number of Dead 0 Minimum Estimated Number of Missing 0 Total Number of Dead and Missing 0 Number of Survivors 0 Number of Females 0 Number of Males 0 Number of Children 0 Cause of Death 0 Location of death 0 Information Source 8 Coordinates 36 dtype: int64
migration_df = migration_df.dropna()
migration_df.isna().sum()
Incident year 0 Reported Month 0 Region of Incident 0 Country of Origin 0 Number of Dead 0 Minimum Estimated Number of Missing 0 Total Number of Dead and Missing 0 Number of Survivors 0 Number of Females 0 Number of Males 0 Number of Children 0 Cause of Death 0 Location of death 0 Information Source 0 Coordinates 0 dtype: int64
migration_df.shape
(12968, 15)
sns.set_style("whitegrid")
sns.set_context("poster")
sns.catplot(data=migration_df, x = 'Incident year', kind = 'count', height=7, aspect = 1.8)
plt.title('\n Total Yearly Number of Incidents Faced by Migrants')
plt.ylabel('Total Incident')
plt.show()
sns.set_style("whitegrid")
sns.catplot(data=migration_df, y = 'Region of Incident', kind = 'count', height=8, aspect = 1.5)
plt.xlabel('Total Incident')
plt.title('\n\n Number of Incidents Faced by Migrants by Region\n')
plt.show()
sns.catplot(data = migration_df, x = 'Incident year', y = 'Total Number of Dead and Missing', height=8, aspect = 1.8)
plt.title('\n\n Total Number of Death or Missing per Incident\n')
plt.show()
sns.catplot(data = migration_df, x = 'Incident year', y = 'Number of Females', height=8, aspect = 1.8)
plt.title('\n Number of Death or Missing per Incident of Female Migrants\n')
plt.show()
total_dead_or_missing = []
for year in migration_df['Incident year'].unique():
total_dead_or_missing.append(migration_df[migration_df['Incident year'] == year]['Total Number of Dead and Missing'].sum())
fig, ax = plt.subplots(figsize=(15,8))
ax.hist(migration_df['Incident year'].unique(), weights=total_dead_or_missing, color='indigo', alpha=0.75)
ax.set_xlabel('Incident Year')
ax.set_ylabel('Total Death or Missing')
ax.set_title('\nTotal Number of Dead or Missing Migrants\n')
plt.show()
total_dead_or_missing_women = []
for year in migration_df['Incident year'].unique():
total_dead_or_missing_women.append(migration_df[migration_df['Incident year'] == year]['Number of Females'].sum())
total_dead_or_missing_men = []
for year in migration_df['Incident year'].unique():
total_dead_or_missing_men.append(migration_df[migration_df['Incident year'] == year]['Number of Males'].sum())
fig, ax = plt.subplots(figsize=(15,8))
ax.hist(migration_df['Incident year'].unique(), weights=total_dead_or_missing_men, color='slateblue', label='Male', alpha=0.75)
ax.hist(migration_df['Incident year'].unique(), weights=total_dead_or_missing_women, color='darkred', label='Female', alpha=0.75)
ax.set_xlabel('Incident Year')
ax.set_ylabel('Total Death or Missing')
ax.set_title('\nNumber of Dead or Missing Migrants by Gender\n')
plt.legend()
plt.show()
total_dead_or_missing_children = []
for year in migration_df['Incident year'].unique():
total_dead_or_missing_children.append(migration_df[migration_df['Incident year'] == year]['Number of Children'].sum())
fig, ax = plt.subplots(figsize=(15,8))
ax.hist(migration_df['Incident year'].unique(), weights=total_dead_or_missing_children, color='indigo', alpha=0.75)
ax.set_xlabel('Incident Year')
ax.set_ylabel('Total Death or Missing')
ax.set_title('\nTotal Number of Dead or Missing Migrant Children\n')
plt.show()
sns.catplot(data=migration_df, y = 'Cause of Death', kind = 'count', height=10, aspect = 3.0)
plt.show()
total_dead = []
for year in migration_df['Incident year'].unique():
total_dead.append(migration_df[migration_df['Incident year'] == year]['Number of Dead'].sum())
total_missing = []
for year in migration_df['Incident year'].unique():
total_missing.append(migration_df[migration_df['Incident year'] == year]['Minimum Estimated Number of Missing'].sum())
fig, ax = plt.subplots(figsize=(15,8))
ax.hist(migration_df['Incident year'].unique(), weights=total_dead, color='purple', alpha=0.75, label='Death')
ax.hist(migration_df['Incident year'].unique(), weights=total_missing, color='slateblue', alpha=0.75, label='Missing')
ax.set_xlabel('Incident Year')
ax.set_ylabel('Total Death and Missing')
ax.set_title('\nTotal Number of Death and Missing by Year\n')
plt.legend(loc='upper center')
plt.show()
def changeToFloat(data):
latitude, longitude = [], []
for i in range(len(data)):
try:
splitted = data[i].split(',')
except:
KeyError
latitude.append(float(splitted[0]))
longitude.append(float(splitted[1]))
return latitude, longitude
lat, long = changeToFloat(migration_df['Coordinates'])
import folium
incident_location = folium.Map(location=(0, 0), tiles = 'cartodbdark_matter', zoom_start = 2.2)
for i in range(len(lat)):
folium.Circle(location = [lat[i], long[i]],
radius=10,
color= 'crimson',
fill_opacity = 100,
fill_color = 'crimson').add_to(incident_location)
incident_location